{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {},
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Matplotlib is building the font cache; this may take a moment.\n"
          ]
        }
      ],
      "source": [
        "import argparse\n",
        "\n",
        "from pathlib import Path\n",
        "\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "from matplotlib import pyplot as plt\n",
        "\n",
        "from sklearn.ensemble import RandomForestRegressor\n",
        "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error\n",
        "\n",
        "import mlflow\n",
        "import mlflow.sklearn"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      },
      "outputs": [],
      "source": [
        "TARGET_COL = \"cost\"\n",
        "\n",
        "NUMERIC_COLS = [\n",
        "    \"distance\",\n",
        "    \"dropoff_latitude\",\n",
        "    \"dropoff_longitude\",\n",
        "    \"passengers\",\n",
        "    \"pickup_latitude\",\n",
        "    \"pickup_longitude\",\n",
        "    \"pickup_weekday\",\n",
        "    \"pickup_month\",\n",
        "    \"pickup_monthday\",\n",
        "    \"pickup_hour\",\n",
        "    \"pickup_minute\",\n",
        "    \"pickup_second\",\n",
        "    \"dropoff_weekday\",\n",
        "    \"dropoff_month\",\n",
        "    \"dropoff_monthday\",\n",
        "    \"dropoff_hour\",\n",
        "    \"dropoff_minute\",\n",
        "    \"dropoff_second\",\n",
        "]\n",
        "\n",
        "CAT_NOM_COLS = [\n",
        "    \"store_forward\",\n",
        "    \"vendor\",\n",
        "]\n",
        "\n",
        "CAT_ORD_COLS = [\n",
        "]\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      },
      "outputs": [],
      "source": [
        "# Define Arguments for this step\n",
        "\n",
        "class MyArgs:\n",
        "    def __init__(self, /, **kwargs):\n",
        "        self.__dict__.update(kwargs)\n",
        "\n",
        "args = MyArgs(\n",
        "                train_data = \"/tmp/prep/train\",\n",
        "                model_output = \"/tmp/train\",\n",
        "                regressor__n_estimators = 500,\n",
        "                regressor__bootstrap = 1,\n",
        "                regressor__max_depth = 10,\n",
        "                regressor__max_features = \"auto\", \n",
        "                regressor__min_samples_leaf = 4,\n",
        "                regressor__min_samples_split = 5\n",
        "                )\n",
        "\n",
        "os.makedirs(args.model_output, exist_ok = True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      },
      "outputs": [],
      "source": [
        "\n",
        "def main(args):\n",
        "    '''Read train dataset, train model, save trained model'''\n",
        "\n",
        "    # Read train data\n",
        "    train_data = pd.read_parquet(Path(args.train_data))\n",
        "\n",
        "    # Split the data into input(X) and output(y)\n",
        "    y_train = train_data[TARGET_COL]\n",
        "    X_train = train_data[NUMERIC_COLS + CAT_NOM_COLS + CAT_ORD_COLS]\n",
        "\n",
        "    # Train a Random Forest Regression Model with the training set\n",
        "    model = RandomForestRegressor(n_estimators = args.regressor__n_estimators,\n",
        "                                  bootstrap = args.regressor__bootstrap,\n",
        "                                  max_depth = args.regressor__max_depth,\n",
        "                                  max_features = args.regressor__max_features,\n",
        "                                  min_samples_leaf = args.regressor__min_samples_leaf,\n",
        "                                  min_samples_split = args.regressor__min_samples_split,\n",
        "                                  random_state=0)\n",
        "\n",
        "    # log model hyperparameters\n",
        "    mlflow.log_param(\"model\", \"RandomForestRegressor\")\n",
        "    mlflow.log_param(\"n_estimators\", args.regressor__n_estimators)\n",
        "    mlflow.log_param(\"bootstrap\", args.regressor__bootstrap)\n",
        "    mlflow.log_param(\"max_depth\", args.regressor__max_depth)\n",
        "    mlflow.log_param(\"max_features\", args.regressor__max_features)\n",
        "    mlflow.log_param(\"min_samples_leaf\", args.regressor__min_samples_leaf)\n",
        "    mlflow.log_param(\"min_samples_split\", args.regressor__min_samples_split)\n",
        "\n",
        "    # Train model with the train set\n",
        "    model.fit(X_train, y_train)\n",
        "\n",
        "    # Predict using the Regression Model\n",
        "    yhat_train = model.predict(X_train)\n",
        "\n",
        "    # Evaluate Regression performance with the train set\n",
        "    r2 = r2_score(y_train, yhat_train)\n",
        "    mse = mean_squared_error(y_train, yhat_train)\n",
        "    rmse = np.sqrt(mse)\n",
        "    mae = mean_absolute_error(y_train, yhat_train)\n",
        "    \n",
        "    # log model performance metrics\n",
        "    mlflow.log_metric(\"train r2\", r2)\n",
        "    mlflow.log_metric(\"train mse\", mse)\n",
        "    mlflow.log_metric(\"train rmse\", rmse)\n",
        "    mlflow.log_metric(\"train mae\", mae)\n",
        "\n",
        "    # Visualize results\n",
        "    plt.scatter(y_train, yhat_train,  color='black')\n",
        "    plt.plot(y_train, y_train, color='blue', linewidth=3)\n",
        "    plt.xlabel(\"Real value\")\n",
        "    plt.ylabel(\"Predicted value\")\n",
        "    plt.savefig(\"regression_results.png\")\n",
        "    mlflow.log_artifact(\"regression_results.png\")\n",
        "\n",
        "    # Save the model\n",
        "    mlflow.sklearn.save_model(sk_model=model, path=args.model_output)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "jupyter": {
          "outputs_hidden": false,
          "source_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Train dataset input path: /tmp/prep/train\n",
            "Model output path: /tmp/train\n",
            "n_estimators: 500\n",
            "bootstrap: 1\n",
            "max_depth: 10\n",
            "max_features: auto\n",
            "min_samples_leaf: 4\n",
            "min_samples_split: 5\n"
          ]
        },
        {
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAsWklEQVR4nO3deZxcdZnv8c/TnQSpBEjSCTtdjRLAoIAQGUBFZRkhIowKMzCdGDYbEoTMiAvazIjXm7mKDhpGiQQEgl0KiqAMgyAgi/cikY7sRCRC0iRGSFgDYUnSz/3jnOpUus+pOtVde33fr9d5pev8quo8XUnOU7/d3B0RERGAlmoHICIitUNJQUREBigpiIjIACUFEREZoKQgIiIDRlU7gJGYNGmSd3R0VDsMEZG6smTJkrXuPjmqrK6TQkdHB729vdUOQ0SkrpjZirgyNR+JiMgAJQURERmgpCAiIgOUFEREZICSgoiIDFBSEBGRAUoKIiJ1xB36+uCVV8rz/koKIiJ14ne/g5YWSKfhPe+BV18t/TXqevKaiEgz2LgR9t0Xli7dfG7lSvjrX2HbbUt7LdUURERq2E03wejRWyYEgKlTYe+9S389JQURkRr0xhswfjwcf/zQsosvhscfL8911XwkIlJjrroKTjstuuyVV0rfZJRLSUFEpEa8/DJMmBBdds01MHNm+WNQ85GISA246KLohDB5ctCUVImEAKopiIhU1erVsPPO0WU33wwf/3hl41FNQUSkSs47Lzoh7LdfMAy10gkBVFMQEam4ZctgypTosvvug0MOqWw8uVRTEBGpoH/+5+iEMH069PdXNyGAagoiIhXx4INwwAHRZY89BvvsU9l44pStpmBmV5rZ82b2WM65b5vZn8zsETO70czG55R9xcyWmdmTZvaxcsUlIlJJ/f3w4Q9HJ4TTTw8WuKuVhADlbT66Gjh60Lnbgfe4+77An4GvAJjZVOAkYJ/wNZeaWWsZYxMRKbu774bWVrj33qFly5fDFVdUOqLCypYU3P1e4MVB537j7hvDh/cDu4Y/Hw9c6+5vufszwDLgoHLFJiJSThs2BP0GH/3o0LILLghqB+l05eNKopp9CqcB14U/70KQJLJWhueGMLMuoAugvb29nPGJiBTtxhvhU5+KLluzBiZNqmw8xarK6CMz6wY2ApliX+vuC919mrtPmzx5cumDExEZhvXrYezY6IRwySVB7aDWEwJUoaZgZqcAxwJHuLuHp1cBu+U8bdfwnIhIzbv8cujqii5btw7GjatsPCNR0ZqCmR0NfAk4zt3X5xTdBJxkZluZ2e7AFOAPlYxNRKRYL70EZtEJ4Sc/CWoH9ZQQoLxDUn8K/B7Yy8xWmtnpwPeBbYDbzewhM/shgLs/DvwMeAK4FTjb3TeVKzYRkZGaNw8mThx6fued4a234OSTKx9TKdjmFpz6M23aNO/t7a12GCLSRFatgl13jS679Vb4WB3MsjKzJe4+LapMy1yIiCR07rnRCeH974dNm+ojIRSiZS5ERAr4859hr72iyxYvhoMaaFaVagoiIjHc4YQTohPC8ccHS1g0UkIA1RRERCL19gbNQlGWLoW9965sPJWimoKISI7+fjj00OiEcNZZQe2hURMCqKYgIjLgjjvgqKOiy/r6YLfdossaiWoKItL0NmyAjo7ohPD1rwe1g2ZICKCagog0uZ//HP7xH6PL1q6FtrbKxlNtqimISFN6/XUYNSo6ISxYENQOmi0hgJKCiDShBQuCNYk2DVpMZ/RoeO21oEO5Wan5SESaxgsvxC9f/bOfwYknVjaeWqSagog0ha9/PTohpNPBAnZKCAHVFESkoT37LMRt0njHHXDEEZWNp9appiAiDWvOnOiE8IEPBP0JSghDqaYgIg1n6VKYOjW67IEHYFrkotECqimISANxh+OOi04IJ54YLGGhhJCfagoi0hAWL4aDD44ue/JJ2HPPysZTr1RTEJG6tmlT8O0/KiGcc05Qe1BCSE41BRGpW7fdBkcfHV22alWwX7IURzUFEak7b78d3PCjEsK8eUHtQAlheFRTEJG68pOfQGdndNmLL8KECZWNp9GopiAidWHdOjCLTggLFwa1AyWEkStbUjCzK83seTN7LOfcRDO73cyeCv+cEJ43M7vEzJaZ2SNmdkC54hKR+nPJJbDttkPPjx0brHb62c9WPqZGVc6awtXA4Ba/84E73X0KcGf4GOAYYEp4dAELyhiXiNSJNWuC2sHcuUPLfvGLYEXTVKrycTWysiUFd78XeHHQ6eOBReHPi4B/yDl/jQfuB8ab2U7lik1Eat8FF8D22w89v+eewU5pn/pU5WNqBpXuaN7B3VeHP/8N2CH8eRfg2ZznrQzPrUZEmsqKFcHWmFHuvhs+/OFKRtN8qtbR7O4OeLGvM7MuM+s1s941a9aUITIRqZYzzohOCB/5SLBEhRJC+VW6pvCcme3k7qvD5qHnw/OrgNxtsXcNzw3h7guBhQDTpk0rOqmISO157DF473ujyx58EPbfv6LhNLVK1xRuAmaFP88CfpVz/jPhKKSDgVdymplEpEG5wzHHRCeEk08OypUQKqtsNQUz+ynwEWCSma0EvgZ8E/iZmZ0OrACyW2bfAkwHlgHrgVPLFZeI1Ib77gv2NYjy1FOwxx6VjUcCZUsK7n5yTNGQbS3C/oWzyxWLiNSOTZvggAPgkUeGlp13HnznO5WPSTbTMhciUjG33AIf/3h02erVsOOOlY1HhtIyFyJSdm++CZMmRSeEiy4K+g6UEGqDagoiUlbXXAOzZkWXvfQSjB9f0XCkACUFESmLV1+F7baLLrvqKjjllIqGIwmp+UhESu7ii6MTwoQJsH69EkItU01BRErmuefi+wZuugk+8YnKxiPFU01BREriy1+OTgj77BMsYKeEUB9UUxCREXnmGXjnO6PL7r0XPvShysYjI6OagogM22c+E50QjjoqWMBOCaH+qKYgIkV7+OH4NYkefhj23bei4UgJqaYgIom5w5FHRieEz3wmKFdCqG+qKYhIIr/7HRx2WHTZ00/D7rtXNh4pj4I1BTPb08zuNLPHwsf7mtkF5Q9NRGrBxo0wdWp0Qvjyl4PagRJC40jSfHQ58BVgA4C7PwKcVM6gRKQ23HQTjB4NS5cOLXvuOfjmNysfk5RXkqSQcvc/DDq3sRzBiEhteOMN2HZbOP74oWUXXxzUDrbfvvJxSfkl6VNYa2bvItxP2cxOALQrmkiDuuoqOO206LJXXgmShTSuJEnhbII9kfc2s1XAM8CMskYlIhX38svB2kRRfvxjmKH/9U2hYFJw96eBI81sLNDi7uvKH5aIVNJFFwWdxoNtvz309cFWW1U+JqmOgknBzP590GMA3P1/lSkmEamQ1ath552jy/7nf2D69MrGI9WXpKP59ZxjE3AM0FHGmESkAj7/+eiEsP/+wTBUJYTmlKT56D9zH5vZd4DbyhaRiJTVsmUwZUp02X33wSGHVDYeqS3DWeYiBexa6kBEpPxOPjk6IUyfHixgp4QgSfoUHiUcjgq0ApMB9SeI1JEHH4QDDogue+yxYM8DEUg2JPXYnJ83As+5+4gmr5nZvwJnECSbR4FTgZ2Aa4E2YAkw093fHsl1RJpdfz989KPBvgaDnXEGXH555WOS2hbbfGRmE81sIrAu53gD2DY8PyxmtgtwLjDN3d9DUPs4CfgW8F133wN4CTh9uNcQEbj7bmhtjU4IK1YoIUi0fDWFJQTf5C2izIGYvZYSX3drM9tA0EexGjgc+OewfBFwIbBgBNcQaUobNsC73w1/+cvQsgsugG98o/IxSf2ITQruXpZ1D919VTiCqY+g5vEbggT0ck6z1Epgl6jXm1kX0AXQ3t5ejhBF6tYNN8CnPx1dtmYNTJpU2Xik/iQafWRmE8zsIDM7LHsM94JmNgE4Htgd2BkYCxyd9PXuvtDdp7n7tMmTJw83DJGGsn49pFLRCeGSS4IF7JQQJIkko4/OAOYSDEN9CDgY+D1Bc89wHAk84+5rwve/AfgAMN7MRoW1hV2BVcN8f5GmsnAhnHlmdNm6dTBuXGXjkfqWpKYwF3g/sMLdPwq8D3h5BNfsAw42s5QFa2YcATwB3AWcED5nFvCrEVxDpOG99BKYRSeEn/40qB0oIUixkiSFN939TQAz28rd/wTsNdwLuvti4HrgjwTDUVsIVmH9MvB5M1tGMCz1R8O9hkijmzcPJkaMAdxlF3jrLThJ22DJMCVJCivNbDzwS+B2M/sVsGIkF3X3r7n73u7+Hnef6e5vufvT7n6Qu+/h7ie6+1sjuYZII1q1KqgdXBCxIe5tt8HKlTBmTOXjksrJZDJ0dHTQ0tJCR0cHmUympO+fZO2jT4Y/XmhmdwHbAbeWNAoRKeicc+D73x96/v3vh/vvh5bhLFojdSWTydDV1cX69esBWLFiBV1dXQB0dnaW5Brm7vmfYHYJcK2731eSK5bQtGnTvLe3t9phiJTVk0/C3ntHly1eDAcdVNl4pHo6OjpYsWJoQ006nWb58uWJ38fMlrj7tKiyJN8tlgAXmNlfzOw7Zhb5RiJSWu5wwgnRCeH444MlLJQQmktfX19R54ejYFJw90XuPp1gBNKTwLfM7KmSRSAiQ/T2Bs1Bv/jF0LKlS+GXvwz6FqS5xE3YLeVE3mJaIfcA9gbSwJ9KFoGIDOjvh0MPDfoJBps9O6g9xDUlSeObN28eqVRqi3OpVIp58+aV7BpJJq9dBHwS+AvBKqbfcPeXSxaBiABwxx1w1FHRZc8+C7tqF5Oml+1M7u7upq+vj/b2dubNm1eyTmZIVlP4C3CIux/t7lcrIYjEK3a4YCaTIZ2egllfTEL4Gul0B/fcU9phh1K/Ojs7Wb58Of39/SxfvrykCQGSDUm9rKRXFGlQxQ4XzGQynHbarbz9dlwXXRvwIitWUPJhhyJxCg5JrWUakiq1pJjhgq+/DuPGbSTqe5nZHNyHrhpf7LBDkTgjHZIqIgkkHS64YEF2TaLBCeFtYGxkQsj3/iKlFNt8VGh3NXd/sfThiNSv9vb2yJpCdrjgCy/kW776BCAYf9ra2sqmTZti30eknPLVFJYAveGfa4A/A0+FPy8pf2gitSuqQznfcMELL4xOCGbPAKPJJoRUKkVXV1fZhx2KxHL3vAdwOTA95/ExwGWFXleJ48ADD3SRSuvp6fFUKuUE29I64KlUynt6erynp8fT6bSbmafTaZ8//wYPZhcMPe64w4c8v6enZ+AaUedFSgHo9bh7flzBwBPg0STnqnEoKdS3er3xpdPpLRJC9kin01s878wzo5PBBz7gvmlTdWIXcc+fFAoOSQX+amYXAD3h407gr6Wpp0izqsRqj+VSqEN56VKYOjX6tQ88ANO0epjUsCSjj04GJgM3AjeEP59czqCk8XV3dw8khKz169fT3d1dpYiSi+vw3W23do47LjohnHhisIRFtRJCudfgl8aReJ6CmY1199fLHE9RNE+hfrW0tBD1b8/M6O/vr0JEyQ2u5QBstdVhvPXWPZHP//OfYcqUSkU3VFS8qVSKhQsX1nytTMpjRPMUzOxQM3sCWBo+3s/MLi1xjNJkKrHaY7l0dnaycOFC0uk00MqYMQ9HJoRzzw16EbIJoVrf1uu5ViZVENfZkD2AxcBuwIM55x4r9LpKHOporl/5RvCM9H0r1Xn9619HdySD+6pVQ+Mqx++bhJlFdoybWdmvLbWJEY4+Whz++WDOuYcLva4Sh5JCfSv1DbxSN94333TfccfoZDBvXvRrko5YKodqXltq00iTwvXAocAfCWbZfIFge04lBakplbj5ZTLxtYMXX4x/XTW/rVezliK1KV9SSDL66CzgbGAXYBWwPzAnwetEKipqiYl854uxbl2w01lUv+zllwdpYcKE+NdXsw8ltw/EzEin0+pkllhJksJe7t7p7ju4+/buPgN490guambjzex6M/uTmS01s0PMbKKZ3W5mT4V/5vkvJjJUa2trUeeTuuQS2HbboefHjYP16+GMMwq/RyV2zMqn3GvwS+NIkhT+K+G5YswHbnX3vYH9CEY2nQ/c6e5TgDvDxyKJRS0iF3U+6SigNWuC2sHcuUPLbrwxqD1svXWy2PRtXepF7DwFMzuEoC/hX4Dv5hRtC3zS3fcb1gXNtgMeAt7pORc3syeBj7j7ajPbCbjb3ffK916apyC5kuxnkHTMfnc3/Md/DL3GnnvC44/DqCRrAYjUqOHOUxgDjCNYXnubnONVgnV+h2t3gpVWrzKzB83sCjMbC+zg7qvD5/wN2CHqxWbWZWa9Zta7Zs2aEYQhjSZJE02hMfvz5/8Ss+iEcPfd8OSTSgjS4OJ6oLMHkC70nGIOYBqwEfi78PF84BvAy4Oe91Kh99LoIxms0DDXfKOAPvzhpyJHFb373au9v79Kv9AI1OuCg1J+jHD00RVmNj77wMwmmNltI8hDK4GV7r44fHw9cADwXNhsRPjn8yO4hkik6NE+++Dezz337BFRtj/r1x+M2dCSWl5PKNtMtmLFCtx9YMHBWopRalRctvDN39gfTHKumAP4HcGoJoALgW+Hx/nhufOBiwq9j2oKkqunp8fHjBmzRQ1gzJgxW3xDHjpm/5aYeQc9eecS1PrYf01Yk3zIU1MouCCemS0h6FjuCx+ngRvd/YDhJiIz2x+4gqDf4mngVIL+jZ8B7cAK4B+9wJaf6miWXJMmTeKFF14Ycr6trY21a9cOPM5kMpx33i947rkbYt7pXQT/LAO5HdVZSTq1q6meFxyU8svX0Zyky6wb+L9mdg9gwIeArpEE5O4PEfQtDHbESN5XmltUQsg9n8lk+OpX/42+vl8SbAsy2HeAL25xJm4uQaE9Faqt0H7RInEK9im4+60Ebf7XAdcCB7r7SPoURCpuzpw5nH76L+jrexrYN+IZOzI4IQBsvfXWzJw5c4s+g0wmQ0tL9H+dWrnpVnuynNSxuHYlYO/wzwOijrjXVfJQn4LkIqINPTi2clgT03fwhdjXDR6plEqlfPbs2UP6EnLLa6VPwV2jjyQew+lTMLPL3f2zZnZXdC7xw0eakEZKfQoCwTf37u7umDWOZgLXxLxyO4JpN8m1trZGzpxubW1l0aJFmqEsdSFfn0LinddqkZJCc8ne/Pv6+mhvbx9oChk8QzmQnWcZ5RRgUUljUweu1JNhdTSb2afyvam7xw3dECm5wctTrFixghkzZsQ8+/PAf0acfxHYGXhr2HHE1RRqpS9BZKTyjT76RPjn9gRrIP02fPxR4D5ASUHKKrdm0NLSErvg3WbbA8/FlH0CuDmyxMxih2/mnk+lUsyaNYtFixYNWTtJHbjSKGJHH7n7qe5+KsHGOlPd/dPu/mlgn/CcSNnMmTOHmTNnDszILZwQvkl0QniM4LvP5oTQ1ta2xWqlZ511VuRInbPOOmvIqqaXXnqpVjuVxhbXA509gKWDHrcMPletQ6OPGlNPT0/sGkVDj91jd0KDDw55/uAZzrnX1EgdaRaMcEbz94EpwE/DU/8ELHP3c0qZnIZDHc2NKW628FCLgM9EnP8N8LEhZ9va2pg/f76+1UvTG+7S2QC4++eAHxJshrMfsLAWEoI0jsELyxVOCPsSfPGPSgjvJSoh9PT0sHbtWiUEkQKSrgz/R2Cdu99hZikz28bd15UzMGkOUaOK4jp+A3cQvRrK1QRLaEXr6gpWZlFSEMmvYE3BzD5LsLz1ZeGpXYBfljEmaSJRm95EJ4QPEdQOhiaEnXf+ID09oxk7dmzsdXI30smnlpfDFqmEJDWFs4GDgMUA7v6UmW1f1qikaRReQK6VYATR3kNKjj32cR599OP09fXR3b2SDRs2jOhaUbUW1TCk2STZZOctd387+8DMRhF8ZRMZltxv4/l9gmCTvqEJYdasL/Hb3x60xSYyb7/99pDn5So0wazQVp0izSBJTeEeM/sqsLWZHQXMAf67vGFJoxr8bTzaOwjmHGwbUfavwPdYVOQqFUkmmNX6ctgilZCkpvBlYA3wKHAmcAtwQTmDksaSWzOYNWtWgYRwCvAG0QlhG+B7RV+/paUl0QSzuJqElrCQZpI3KZhZK8FEtcvd/UR3PyH8Wc1HUlAmk2GbbbZhxowZCWYmb0fQKnlVRNkMgv2dXis6hjFjxnDNNdck6hPQHgQiBZKCu28CnjQzfVWSomQyGU477TReey3JjfxLwMsR558DtgKiRwCZ2RaPU6kUs2fP3mIJiiuvvDJxJ3FnZ6eWsJCml2RG873A+4A/AK9nz7v7ceUNrTDNaK5dySah7QisjimbDvw69pWjR4/mjDPO4JZbbtliKW3dwEUKG+kezf9W4nikCRTunP1PgiWuB3uQYPvu+L0J8i1XEbXnghKFSHL59lN4B3AWsAdBJ/OP3H1jpQKT+pPJZJg7dy4vvPBCnme9C1gWU3YIcH/ea6TTaZYvXx57fc0zEBmZfH0Kiwi+sj0KHEP0riUiQLDU9YwZMwokhJ8QnRBuJuhIzp8QYHMNJGrmseYZiIxcvuajqe7+XgAz+xFBn0LJhCObeoFV7n6sme0OXAu0AUuAmbmT5qR2ZTIZfvjDH+Z5xv4EzUJRpgJLE1+rvb09tkYQN9RV8wxEkstXUxhYM6BMzUZz2fJu8C3gu+6+B/AScHoZriklkP2WbmaMGjWKGTNmxKxXZMA9RCeEy8Py5AkhOzw0rkbQ2toa+TrNMxBJLl9S2M/MXg2PdcC+2Z/NLG5H9ETMbFfg48AV4WMDDidYeA+Cpqt/GMk1pDyy39KzI4vi5x18hKCz+LCIsnagK9H1zGzI8NC4b/6bNm3SPAOREcq3HWeru28bHtu4+6icn6OmmxbjewSD07NDTNqAl3NqJCsJVmMdwsy6zKzXzHrXrFkzwjCkWFHf0rc0CngKuCui7BsEtYNnE18vqgYS980/mzg0z0Bk+JIsc1FSZnYs8Ly7LxnO6919obtPc/dpkydPLnF0Mlgmk2HSpEkD39jzzz34JEGr4x4RZZOAfx9WDNkF77q6ushkMnlnHnd2drJ8+XL6+/tZvny5EoJIkZJuslNKHwCOM7PpBCufbQvMB8ab2aiwtrArsKoKsUmO7KzkQquPwtbAWiAVUfY54AcFr/WOd7yDN998M+9zsiOJskNSNR9BpPQqXlNw96+4+67u3gGcBPzW3TsJ2htOCJ82C/hVpWMrp3ravCUb64wZMxIkhM8C6xmaEPqBcSRJCAA77LADPT09A00/cbL9CaoRiJSJu1ftIOiNvDn8+Z0Ew16XAT8Htir0+gMPPNDrQU9Pj6dSKSdY8c0BT6VS3tPTU+3QhoiKNfqY4OAxxz8leP2Wh5ltEUc6nY58Xjqdrs4HI9JAgF6Puy/HFdTDUS9JoZ5ucHGxbnl8NSYZPOswuuiEEPVZJEmkPT09nk6n3cw8nU7XZJIVqUVKClVmZom+HdeC/DfvnfPUDo4aVjLIV2vKd9Ovp9qXSK1RUqiyeqgp9PT0eFtbW56b9yUxyWCxQ3TSy3e0traO6Bt+PXymIrUqX1KoeEdzM6r1zVvyr1u0J8H99pyIsoOAvwvLi7Np0yZ+/OMfD7uTWFtnipSHkkIF1OrmLdk5CAsWLIh5xvXAkxHnbyCYhPbAiK6fnXcwHNo6U6RM4qoQ9XBUs/monjs5Z8+e7S0tLXmadw7M03ewV+ImovzXGFlzj/oURIYP9SmUVj3fkKZOnZrnJm0Ov49JBj8YVr9BoecM7mwvJtkW6oiu16QtUm5KCiVWj52cPT09Pnbs2Dw36CPy1A52KTohJD1yP7NSJdt6TtoilaCkUGL1MsQ0+205/415tMOKmGTwb2VLBlE36lIl23pM2iKVlC8pqKN5GOqhk3POnDnMnDmzwAJ2JwJvEyxlPdhEglVNi5dKpWhra4ssa21tje1sL9WIIo1MEhk+JYVhqPUhpplMhgULFgRVwUgpgtVMfxZRdhbByKKXirpmdoOb7M1+/vz5jB49eovnjB49mkWLFsWuV1SqZFsPSVukVikpDEOtDjHNOuOMM/KUzgZeZ+gCuW8CY4HLirpWOp3G3dm4cSPuvsXNfvDCdvkWuoPSJdtaT9oiNS2uXakejnqZ0VxJs2fPjmnDn5inI/nTJekTyDXcdv1SjRrS6COReOTpUzCPbWKofdOmTfPe3t5qh1F1++yzD0888USeZ3wNuDDi/NPAXkDyLbhbW1vp7+8vuIdBS0tLZPOVmdHf3x/xChGpFDNb4u7TosrUfFTHjjzySMwsT0LYleAL+oURZYcD76KYhJBKpfL2CeRSu75IfVJSqEOZTIaWlhbuvPPOPM9aQPReyL8j+Gu/q6hrtra2FtVvonZ9kfqkpFBnMpkMM2bMyDOyaG+C2sFZEWUHAoeF5cmNGTOGRYsWFdWRXuud8SISrRp7NEuRjjzyyAK1gqxfAcdFnL+OYOfT4Rluv1NnZ6eSgEidUU2hhs2ZMwczS5AQsstXRyWEKYwkIQBs2LCB7u7uEb2HiNQHJYUak8lk6OjowMzyLGmd1QL0AvdHlM0nmIS2rCRx5Z8ZLSKNQkmhhmQyGbq6uhLegD8GbCLoJxhsZ+BfShnawIzlassmzZaWFjo6Oobsx1CoXEQKiJvAUA9Ho01eK7x4HQ5jHP4aMwnt/LIuYFdthVY/1eqoIsmgBfFqXyaTSVBDOBl4C9gpomw88M1ShzWgtbW16t++u7u7Wb9+/Rbn1q9fP9DfUahcRAqreFIws93M7C4ze8LMHjezueH5iWZ2u5k9Ff45odKxVUt2j+R44wi++P4koux0gr6DV4q65rhx44p6/qZNm3B3VqxYMaJtNEei0OqnWh1VZOSqUVPYCJzn7lOBg4GzzWwqcD5wp7tPAe4MHzes5B3K5wLrIs6vA7YGrizqumPGjKGnp4eZM2cmen5UX0K1vn0XmiWtWdQiJRDXrlSpg2Bw/VEEO8TvFJ7bCXiy0GtruU9h8IJss2fPHnicfwe07DEpzwJ2xw+7XyC7IF2S/otsvFFl1dhQSH0KIqVBre68BnQAfcC2wMs55y33cdxRq0kh6uZU3PG/Y5LBUofC+x7nO7I387ib/eCbaa3tYlZo9VOtjipSWE0mBYKG8iXAp8LHLw8qfynmdV0Eg/N729vby/F5jViyUURRR3ue2sFhI0oGxdQU2tra9O1bpIHVXFIARgO3AZ/POVeR5qNyfpNMtidy3HFFTDK4syTJIHvku9mbmc+ePbuin5mIVF5NJQWCpqFrgO8NOv9t4Pzw5/OBiwq9V7FJoRzfenMTQaEmmehjnzy1g/1KmhAG3/Ab8WbfiL+TSKnVWlL4YHiTegR4KDymA20Eo46eAu4AJhZ6r2KTQqnbx0fed/DrmGTQU9Jk0Nra2hQ3RzV1iSSTLyk01c5rpdwNLJPJMGvWLDZt2lTU6wKHAv8vpuxdBDuilcbo0aO56qqrmmK10o6OjsgJgOl0muXLl1c+IJEapZ3XQqUax55do6j4hNACPEx0QvgOQcta6RJCW1tb3oTQaOsExc0I12J+Isk1VVKYPn165Pm1a9cWdWOMWk4hwdUJFrDbN6JsR+CLRb1bW1sbPT09kTOTU6kUPT09rF27Nm9CyC6+517dmcoiUkPi2pXq4ShVnwJFtkEX16G8lcOamL6DLwy7nyA3xuF0rtba/INSyPd5ichmqE8hENenMFihNui4tuuhZhIMtIqyHfBqgvcYauzYsbz22mvDem1WKftXaoWZxZbV879zkVJTn0Ioad9BdgG1uDb3V14ptPjctgRfUKMSwikEfQfDSwgtLS1cdtllw3ptrlpaJ6jR+jZE6lpcFaIejmKbj6ZOnZqoaaatrc3HjRs35HyyZqPPxzQVvRA2JQ1/aGnuTOORqpXhm6WMI25NqbFjx5YhcpH6RS3NUyjlUWxSSHLjHT16tI8aNWoYN+3tY5KBOxw74mRQDrUw0auUfRs9PT3e0tKyxfu0tLRonoLIIEoKmz+IvEc6nY6sIRQ+vhmTDB7xkS5glxtbI97cSr0Kay0kOpFaly8pjEIGjB07tsgx7bsTP6/gg8RPUCveihUrOPXUUwEaaiJae3t75Gc+3L6Nzs7Ohvp8RCqtqTqaC3niiSeKePY1RCeE2wg6kkuXELI2bNjA3LlzS/6+1TRv3jxSqdQW51KpFPPmzatSRCLNramSQktLKX7dfQlaOKJ2LnsvcHQJrhHvhRdeKOv7V1pnZycLFy4knU5jZqTTaRYuXKhv+yJV0lTzFPKNY0/mTuDwiPNXA6eO8L2Tq+e/MxGpPs1TGLGPEdQOohLC7lQyIQAayy8iZaOkkNdogmRwa0TZfxD0HSwvy5XHjBlDa2trZJnWKRKRclFSiPVF4O2Ysu2B7pJfMdu8lU6nufLKK1m0aBHpdDryuevXr6e7u/QxiEhz05DUIcYB62LKLgK+XJarptNp5s2bN6SDtbOzM3adouxyHCIipaKksIVLgHNiyiYB5Rn5Y2Z5F+Ar9Vh+EZE4aj4CYAeCvoOohPAtgr6D8g0FLXRz11h+EakUJQW+BPwtpiwFnF/Wqye5uWssv4hUShPPU9gRWB3zzM8BPxhWTKlUioULFwLQ1dW1xQ5tZsbhhx/OsmXL6Ovro729PbIfQUSknPLNU2jSPoVTgKtiykYRbJtZvKjO4u7ubiUAEakbTVVTaG1tJdhUbA0wcVDpwcDiomMYM2YMV155pW72IlI3NKM5dOaZZwL9wPM5Z28m6EhOlhBaWlpoa2sbaNtXQhCRRlJzzUdmdjQwH2gFrnD3b5bqvS+99FIALrvsQ/T3Hwr0An+NfO64ceN4/fXXmTgxqFG8+OKLagISkYZXU81HZtYK/Bk4ClgJPACc7O6Ra1oX23wkIiL11Xx0ELDM3Z9297eBa4HjqxyTiEjTqLWksAvwbM7jleG5AWbWZWa9Zta7Zs2aigYnItLoai0pFOTuC919mrtPmzx5crXDERFpKLWWFFYBu+U83jU8JyIiFVBrSeEBYIqZ7W5mY4CTgJuqHJOISNOoqSGp7r7RzD4H3EYwJPVKd3+8ymGJiDSNmkoKAO5+C3BLteMQEWlGNTVPoVhmtgYYutHAliYBaysQTjnV+++g+KtL8VdXLcafdvfIkTp1nRSSMLPeuEka9aLefwfFX12Kv7rqLf5a62gWEZEqUlIQEZEBzZAUFlY7gBKo999B8VeX4q+uuoq/4fsUREQkuWaoKYiISEJKCiIiMqChk4KZHW1mT5rZMjM7v9rxFGJmu5nZXWb2hJk9bmZzw/MTzex2M3sq/HNCtWPNx8xazexBM7s5fLy7mS0O/x6uC5cwqUlmNt7MrjezP5nZUjM7pJ4+fzP71/DfzmNm9lMze0etf/5mdqWZPW9mj+Wci/zMLXBJ+Ls8YmYHVC/ygVij4v92+G/oETO70czG55R9JYz/STP7WFWCzqNhk0K4Yc8PgGOAqcDJZja1ulEVtBE4z92nEmwafXYY8/nAne4+BbgzfFzL5gJLcx5/C/iuu+8BvAScXpWokpkP3OruewP7EfwedfH5m9kuwLnANHd/D8FSMSdR+5//1cDRg87FfebHAFPCowtYUKEY87maofHfDrzH3fcl2DjsKwDh/+eTgH3C11wa3qtqRsMmBepwwx53X+3ufwx/XkdwQ9qFIO5F4dMWAf9QlQATMLNdgY8DV4SPDTgcuD58Ss3Gb2bbAYcBPwJw97fd/WXq6PMnWLpmazMbBaSA1dT45+/u9wIvDjod95kfD1zjgfuB8Wa2U0UCjREVv7v/xt03hg/vJ1jxGYL4r3X3t9z9GWAZwb2qZjRyUii4YU8tM7MO4H3AYmAHd18dFv0N2KFacSXwPeBLQH/4uA14Oec/SC3/PewOrAGuCpu/rjCzsdTJ5+/uq4DvAH0EyeAVYAn18/nnivvM6/H/9WnAr8Ofaz7+Rk4KdcvMxgG/AP7F3V/NLfNgDHFNjiM2s2OB5919SbVjGaZRwAHAAnd/H/A6g5qKavzzn0DwTXR3YGdgLEObNepOLX/mhZhZN0GzcKbasSTVyEmhLjfsMbPRBAkh4+43hKefy1aRwz+fr1Z8BXwAOM7MlhM01x1O0EY/PmzOgNr+e1gJrHT3xeHj6wmSRL18/kcCz7j7GnffANxA8HdSL59/rrjPvG7+X5vZKcCxQKdvnhBW8/E3clKouw17wvb3HwFL3f3inKKbgFnhz7OAX1U6tiTc/Svuvqu7dxB83r91907gLuCE8Gm1HP/fgGfNbK/w1BHAE9TJ50/QbHSwmaXCf0vZ+Ovi8x8k7jO/CfhMOArpYOCVnGammmFmRxM0ox7n7utzim4CTjKzrcxsd4IO8z9UI8ZY7t6wBzCdoOf/L0B3teNJEO8HCarJjwAPhcd0gnb5O4GngDuAidWONcHv8hHg5vDndxL8w18G/BzYqtrx5Yl7f6A3/Dv4JTChnj5/4OvAn4DHgB8DW9X65w/8lKAPZANBbe30uM8cMIJRhX8BHiUYaVWL8S8j6DvI/j/+Yc7zu8P4nwSOqXb8gw8tcyEiIgMauflIRESKpKQgIiIDlBRERGSAkoKIiAxQUhARkQFKCtKUzGyTmT0Urib637mrWBb5PqeY2fdLEE9J3kdkpJQUpFm94e77e7Ca6IvA2dUOSKQWKCmIwO8JFyUzs3eZ2a1mtsTMfmdme4fnPxHuSfCgmd1hZrGL4plZi5ktH7SG/lNmtkOS9zGzq83shJzHr+X8/EUzeyBcp//rpfn1RTZTUpCmFq5lfwSbl0BZCJzj7gcCXwAuDc//X+BgDxbKu5ZgCYNI7t5PsCzDJ8Nr/B2wwt2fK+Z9ImL9e4JlEQ4imHl9oJkdlvT1IkmMKvwUkYa0tZk9RFBDWArcHq5Oeyjw82DpICBYJgKChcuuCxdnGwM8U+D9rwP+HbiKYB2o64b5Prn+PjweDB+PI0gS9xbxHiJ5qaYgzeoNd98fSBOsp3M2wf+Hl8O+huzx7vD5/wV8393fC5wJvKPA+/8e2MPMJhNsEJNd8TbJ+2wMY8HMWgiSB2Gc/ycntj3c/UfF/uIi+SgpSFPzYAXLc4HzgPXAM2Z2IgzsB7xf+NTt2LzE8awhbzT0fR24EbiYYNXbF4p4n+XAgeHPxwGjw59vA04LazSY2S5mtn2hWESKoaQgTc/dHyRYFfVkoBM43cweBh5n8xauFxI0Ky0B1iZ86+uAGWxuOkr6PpcDHw5jOIRgsx/c/TfAT4Dfm9mjBPs9bJMwFpFEtEqqiIgMUE1BREQGKCmIiMgAJQURERmgpCAiIgOUFEREZICSgoiIDFBSEBGRAf8fDLk13i9m1pQAAAAASUVORK5CYII=",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "needs_background": "light"
          },
          "output_type": "display_data"
        }
      ],
      "source": [
        "mlflow.start_run()\n",
        "\n",
        "lines = [\n",
        "    f\"Train dataset input path: {args.train_data}\",\n",
        "    f\"Model output path: {args.model_output}\",\n",
        "    f\"n_estimators: {args.regressor__n_estimators}\",\n",
        "    f\"bootstrap: {args.regressor__bootstrap}\",\n",
        "    f\"max_depth: {args.regressor__max_depth}\",\n",
        "    f\"max_features: {args.regressor__max_features}\",\n",
        "    f\"min_samples_leaf: {args.regressor__min_samples_leaf}\",\n",
        "    f\"min_samples_split: {args.regressor__min_samples_split}\"\n",
        "]\n",
        "\n",
        "for line in lines:\n",
        "    print(line)\n",
        "\n",
        "main(args)\n",
        "\n",
        "mlflow.end_run()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            " Volume in drive C is Local Disk\n",
            " Volume Serial Number is 583C-74B4\n",
            "\n",
            " Directory of c:\\tmp\\train\n",
            "\n",
            "10/07/2022  12:16 AM    <DIR>          .\n",
            "10/07/2022  12:16 AM    <DIR>          ..\n",
            "10/07/2022  12:16 AM               160 conda.yaml\n",
            "10/07/2022  12:16 AM               409 MLmodel\n",
            "10/07/2022  12:16 AM        21,449,193 model.pkl\n",
            "10/07/2022  12:16 AM               128 python_env.yaml\n",
            "10/07/2022  12:16 AM                46 requirements.txt\n",
            "               5 File(s)     21,449,936 bytes\n",
            "               2 Dir(s)  788,160,786,432 bytes free\n"
          ]
        }
      ],
      "source": [
        "ls \"/tmp/train\" "
      ]
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python38-azureml"
    },
    "kernelspec": {
      "display_name": "Python 3.9.6 64-bit",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.6"
    },
    "nteract": {
      "version": "nteract-front-end@1.0.0"
    },
    "vscode": {
      "interpreter": {
        "hash": "c87d6401964827bd736fe8e727109b953dd698457ca58fb5acabab22fd6dac41"
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
